/* 
Programs required:

ssc install unique, replace
ssc install estout, replace
*/

/*
Order of running files for the national time-series analysis
(1) ConstructingMW-annual.do
(2) This file
*/

/*
Inputs required
(1) GDPDEF_qtr from FRED
(2) cps_00095.dta from IPUMS CPS
(3) fedmin.dta constructed in ConstructingMW-annual.do
(4) state_year_combo.dta constructed in ConstructingMW-annual.do
(5) march-price-quantity-exp-all.dta from AKK replication package
(6) min-unemp-6305.dta from AKK replication package
*/

clear all
pause off
set matsize 5000
set maxvar 20000

graph set window fontface "Georgia"

cd "/Users/jvogel/Dropbox/JvogelPrivateWork/Cannonical_Model_MW/Replication/Data"

************************
*CREATING GDP deflator from FRED data
************************
clear
import delimited Input/GDPDEF_qtr.csv
gen year=real(substr(date,1,4))
bys year: egen gdp=mean(gdpdef)
replace gdpdef=gdp
keep gdpdef year
duplicates drop
save Output/gdpDeflatorFRED_year.dta, replace

************************
*Bringing in March CPS data
************************

use Input/cps_00095.dta, clear

keep if year<=2017 // +1 b/c survey year = work year + 1 (and corrected below after cleaning data based on survey year)

************************
*SAMPLE SELECTION 1
************************

* Missing data

*drop if too young or too old (last year) 
_strip_labels age
replace age=age-1 // since wage is from previous years
drop if age<16 | age>64
*drop if education missing
drop if educ<=1 | educ==999 | higrade==999 | higrade==0
*drop if 3/8 file 2014
drop if hflag==1
*drop if weight<0
drop if asecwt<0
*Drop if missing weeks worked (pre 1976)
drop if wkswork2==9 & year<1976

************************
*Create labor bins
************************

gen edu5=0
*less than high school degree or equivalent, high school degree or equiv, some college, 4 years college, more than 4 years college or grad degree
* year<=1991
replace edu5=1 if higrade<=141 & year<=1991
replace edu5=2 if higrade==150 & year<=1991
replace edu5=3 if higrade>=151 & higrade<=181 & year<=1991
replace edu5=4 if higrade==190 & year<=1991
replace edu5=5 if higrade>=191 & year<=1991
* year>=1992
replace edu5=1 if educ<=71 & year>=1992
replace edu5=2 if educ==73 & year>=1992
replace edu5=3 if educ>=80 & educ<=92 & year>=1992
replace edu5=4 if educ==111 & year>=1992
replace edu5=5 if educ>=120 & year>=1992
assert edu5~=.

* Create college and non-college
assert edu5>=1 & edu5<=5
gen col=(edu5==4 | edu5==5)
label variable col "CLG and GTC +"
assert col~=.

* Gender (female vs. male) and race (white vs. non-white)
assert sex~=.
gen female=(sex==2)
assert race~=. & race~=999
gen white=(race==100)

* Age bins
gen agebin=.
replace agebin=1 if age<26 & agebin==.	 // 10 years
replace agebin=2 if age<36 & agebin==.	 // 10 years
replace agebin=3 if age<46 & agebin==.   // 10 years
replace agebin=4 if age<56 & agebin==.	 // 10 years
replace agebin=5 if age>=56 & agebin==. // 9 years
assert agebin~=.

* Combining by intersection of each
assert agebin~=. & female~=. & white~=. & edu5~=.
tostring agebin female white edu5, replace
gen lbin= agebin+female+white+edu5
label variable lbin "agebin female white edu5"


************************
*BODIES BY LABOR BIN: for a measure of supply, must be before sample selection 2
************************

bys lbin year: egen bodies_ly=sum(asecwt)

************************
*SAMPLE SELECTION 2: allocated income, missing income, non wage workers
************************

* Allocated income data

*drop if income amount in long job (1988-18) is allocated (but no dropping if income type is allocated)
drop if qinclong==1

*drop if other wage income (1988-18) is allocated (but no dropping if income type is allocated)
drop if qoincwage==1

*drop if incwage (1968-87) is allocated (but no dropping if recipiency type allocated)
drop if qincwage==1

* Drop if income variable is missing, not in universe, or negative
drop if incwage==9999999 | incwage==9999998
drop if oincwage==9999999
drop if inclongj==999999
drop if incwage<0 & year<1988
drop if oincwage<0 & year>=1988
drop if inclongj<0 & year>=1988

* Keep if wage worker
keep if classwly>=20 & classwly<=28

************************
*DEALING W/ INCWAGE TOPCODING (all yrs), WEEKS WORKED INTERVALLING (pre 76), uhrsworkly (pre 1976)
************************

replace incwage=99900*1.5 if incwage>=99900 & year<=1967
replace incwage=50000*1.5 if incwage>=50000 & year>=1968 & year<=1981
replace incwage=75000*1.5 if incwage>=75000 & year>=1982 & year<=1984
replace incwage=99999*1.5 if incwage>=99999 & year>=1985 & year<=1987

replace inclongj=99997*1.5 if incwage>=99997 & year>=1988 & year<=1995
replace inclongj=150000*1.5 if incwage>=150000 & year>=1996 & year<=2002
replace inclongj=200000*1.5 if incwage>=200000 & year>=2003 & year<=2010
replace inclongj=250000*1.5 if incwage>=250000 & year>=2011 & year<=2014
replace inclongj=280000*1.5 if incwage>=280000 & year==2015
replace inclongj=300000*1.5 if incwage>=300000 & year>=2016 & year<=2018

replace oincwage=99997*1.5 if oincwage>=99997 & year>=1988 & year<=1995
replace oincwage=25000*1.5 if oincwage>=25000 & year>=1996 & year<=2002
replace oincwage=35000*1.5 if oincwage>=35000 & year>=2003 & year<=2010
replace oincwage=47000*1.5 if oincwage>=47000 & year==2011
replace oincwage=50000*1.5 if oincwage>=50000 & year>=2012 & year<=2013
replace oincwage=46000*1.5 if oincwage>=46000 & year==2014
replace oincwage=56000*1.5 if oincwage>=56000 & year==2015
replace oincwage=55000*1.5 if oincwage>=55000 & year>=2016 & year<=2017
replace oincwage=56000*1.5 if oincwage>=56000 & year==2018

replace incwage=inclongj+oincwage if year>=1988

replace wkswork1=0 if wkswork2==0 & year<1976
replace wkswork1=7 if wkswork2==1 & year<1976
replace wkswork1=20 if wkswork2==2 & year<1976
replace wkswork1=33 if wkswork2==3 & year<1976
replace wkswork1=43.5 if wkswork2==4 & year<1976
replace wkswork1=48.5 if wkswork2==5 & year<1976
replace wkswork1=51 if wkswork2==6 & year<1976

* predict usual hours worked pre 1976 CPS using 1976-1978 CPS
destring edu5  female white age, replace
reg uhrsworkly fullpart i.edu5 i.female i.white i.age if year>=1976 & year<=1978
predict UHRSWORKLY
replace uhrsworkly=UHRSWORKLY if year<1976

gen hrs_week_ly=uhrsworkly*wkswork1

************************
*Changing year from survey year to work year (must occur after data cleaning!)
************************

replace year = year-1
label variable year "Year Income Earned"

************************
*Getting data in final form for Canonical Model except composition adjusting
************************

replace hrs_week_ly=hrs_week_ly*asecwt
replace incwage=incwage*asecwt
collapse (sum) hrs_week_ly incwage, by(lbin year col bodies)
bys lbin year col: gen count=_N
assert count==1
drop count

rename hrs_week_ly hours
rename incwage inc

*From nominal to real (in 2012 dollars): constructed above in this file
merge m:1 year using Output/gdpDeflatorFRED_year.dta
assert _merge~=1
drop if _merge==2
drop _merge
replace inc=100*inc/gdp
drop gdp

************************
/*
	Composition adjusting (CA): approach is to 
(a) first check if a given lbin does not exit in any years
	if any are missing, then a fixed-over-time weight on it must be zero (i.e., drop it from the analysis)
	-- otherwise, the sum of weights is not one in each year w/in college or non-college
	** I've never seen this dealt with in others' codes or discussed in papers. But it must be. Otherwise weights don't sum to one in some years, and not a proper average.
	** This is not an issue for state level analysis, which occurs at the lbin level.
(b) construct total income of college and non-college (just the raw data after adjusting for top code issues and dropping of lbins)
(c) construct CA wages of college and non-college in standard way (using fixed-over-time weights defined by hours for each lbin)
(d1) construct CA supply of college and non-college supply (dual) to make income and wages consistent (perfect in the canonical model, but problematic with monopsony)
(d2) construct CA supply of college and non-college supply (hours worked) using fixed-over-time weights that depend on hours and wages across years
(d3) construct CA supply of college and non-college supply (population) using fixed over time weights and fixed-over-time relative wages
*/
************************

*** (a) drop lbin(s) for which there is at least one year for which it doesn't exist 
bys lbin: gen count_lbin=_N
unique lbin if count_lbin<2016 - 1963 +1
drop if count_lbin<2016 - 1963 +1
drop count_lbin

*** (b) construct total income of college and non-college
bys col year: egen Inc=sum(inc)
gen lnInc=ln(Inc)

*** (c) CA wage for college and non-college
*Fixed-over-time weights across lbins using avg across years of share of hours worked
assert col~=.
bys year: egen hours_cy=sum(hours*col)
bys year: egen hours_ny=sum(hours*(1-col))
gen weight_ly=0
replace weight_ly=hours/hours_cy if col==1
replace weight_ly=hours/hours_ny if col==0
assert weight_ly~=.
bys lbin: egen weight=mean(weight_ly)
*checking weights
bys col year: egen check=sum(weight_l)
assert abs(check-1)<0.000001
drop hours_cy hours_ny weight_ly check
* CA wage for college and non-college
gen wage=inc/hours // average wage by lbin (not average of log wage)
gen lnwage=ln(wage) // construct the log of the average wage at the group level
bys col year: egen lnW=sum(weight*lnwage) // then the average of the log at the college level
label variable lnW "CA average ln wage by college"

*** (d) CA supply for college and non-college
** (d1)  dual measure
gen lnDual=lnInc - lnW
label variable lnDual "Dual supply by college"
** (d2) hours measure
bys col year: egen denom_cy=sum(wage*weight) //  weighted avg wage (using fixed-over time hours weights from above) w/in col by year
gen Wage_ly=wage/denom_cy  // labor bin l's wage in t relative to the weighted avg (fixed weights) in the relevant college group in that year 
bys lbin: egen Wage_l=mean(Wage_ly)  // averaging this relative wage across years
*checking that it a weighted avg of it sums to one with a year (by col)
bys col year: egen check=sum(weight*Wage_l)
assert abs(check-1)<0.00001
drop Wage_ly denom_cy check
label variable Wage_l "avg across years of wage relative to avg of col group" // using same weights as in CA wage construction
bys col year: egen Hours=sum(hours*Wage_l*weight)
gen lnHours=ln(Hours)
** (d3) population measure (where population is measured before sample selection 2)
bys col year: egen Bodies=sum(bodies*weight*Wage_l)
gen lnPop=ln(Bodies)

** Keep one observation per college - year

bys year col: gen c=_n
drop if c>1
keep year lnDual lnHours lnPop lnW col
reshape wide lnW lnDual lnHours lnPop, i(year) j(col)
gen w=lnW1-lnW0
gen sDual=lnDual1-lnDual0
gen sHours=lnHours1-lnHours0
gen sPop=lnPop1-lnPop0

label variable w "ln skill premium"
label variable sDual "ln rel supply of college: dual of wage"
label variable sHours "ln rel supply of college: CA hours"
label variable sPop "ln rel supply of college: CA pop"
gen t=year-1962

label variable lnW1 "ln college real wage"
label variable lnW0 "ln non-college real wage"

save Output/CMcomplete.dta, replace

************************************************
/*Federal minimum wage*/
************************************************

use "Output/fedmin.dta", clear
drop if year<1963 | year>2016

merge 1:1 year using "Output/gdpDeflatorFRED_year.dta" // constructed above in this file
assert year<1963 | year>2016 if _merge~=3
drop if _merge~=3
replace fedmw=100*fedmw/gdp
drop _merge gdp
label variable fedmw "real federal mw (gdp deflator)"

save Output/real_fedmin.dta, replace

************************************************
/*Minimum wage weighted by state population share by year, averaged over years in which we have 51 "states" in March CPS*/
************************************************

use Input/cps_00095.dta, clear
drop if statefip>56 //drop aggregations and non-states and not identified
drop if hflag==1 // drop if 3/8 file 2014
drop if asecwt<0 // drop negative weights
collapse (sum) asecwt, by(statefip year)

*dropping CPS years (1968-1976) with fewer than 51 `states' (keeping DC in national analysis)
bys year: gen count_y=_N
keep if count_y==51

* construct share of the population in each `state' and then avg w/ state across years
bys year: egen Weight_y=sum(asecwt)
gen weight_sy=asecwt/Weight_y
collapse (mean) weight_sy, by(statefip)

rename statefip statefips
merge 1:m statefip using "Output/state_year_combo.dta" // constructed in ConstructingMW_annual.do
assert _merge==3
drop _merge

rename mw mw_sy
bys year: egen mw=sum(mw_sy*weight)
bys year: keep if _n==1
keep year mw

merge 1:1 year using Output/gdpDeflatorFRED_year.dta // constructed above in this file
asser _merge~=1
assert year<1963 | year>2016 if _merge~=3
drop if _merge~=3
replace mw=100*mw/gdp
drop _merge gdp
label variable mw "real mw (gdp deflator) weighted across states using avg across all year population shares"

save Output/mw_series.dta, replace


************************************************
/*Cannonical Model Analysis*/
************************************************

clear all
use Output/CMcomplete.dta, clear

merge 1:1 year using Output/mw_series.dta // constructed above in this file
assert _merge==3
drop _merge

replace mw = ln(mw)
label variable mw "ln real national mw (using using avg across all year population shares)"

merge 1:1 year using Output/real_fedmin.dta // constructed above in this file
assert _merge~=1
assert (year<1963 | year>=2016 ) if _merge~=3
drop if _merge~=3
drop _merge
replace fedmw=ln(fedmw)
label variable fedmw "ln real federal federal minimum wage"

rename lnW0 w0
rename lnW1 w1

*Normalize 1963 values to zero

gen Y63=(year==1963)
foreach k in "w" "sDual" "sPop" "sHours" "mw" "fedmw" "w1" "w0" {
 egen norm_`k'=sum(Y63*`k')
 replace `k'=`k'-norm_`k'
 drop norm_`k'
}
drop Y63

label variable year "Year"
label variable mw "ln real minimum wage"
label variable fedmw "ln real federal minimum wage"

//Create polynomials of time

forval j = 2(1)10 {
  gen t`j'=t^`j'
}

****************************************
****************************************
****************************************
* FIGURES
****************************************
****************************************
****************************************



********************
* Plotting the raw data
********************

//Relative supply graph 
label variable sPop "Population measure"
label variable sHour "Hours measure"
twoway (connected sPop year, ms(O) lwidth(thick) yaxis(1 2) xlabel(,labsize(large))) (line sHour year, lwidth(thick) yaxis(1 2) xlabel(,labsize(large))), xtitle("") ytitle(,axis(1) size(huge)) ytitle(,axis(2) size(huge)) ylabel(,axis(1) labsize(large) nogrid) ylabel(,axis(2) labsize(large) nogrid) ///
  legend(label(1 "Population") label(2 "Hours") region(style(none)) ring(0) position(12) size(*1.5)) graphregion(color(white))
graph export "../Figures/supply.pdf", as(pdf) replace

//College premium graph
label variable w "College premium"
twoway line w year, graphregion(color(white)) lwidth(thick) xtitle("") ytitle(,size(huge)) xlabel(,labsize(large)) ylabel(,labsize(large) nogrid)
graph export "../Figures/premium.pdf", as(pdf) replace

//Minimum wage graph
label variable fedmw "Federal minimum wage"
label variable mw "Minimum wage"
*twoway line mw year, graphregion(color(white)) xtitle("") ytitle(,size(large)) xlabel(,labsize(large)) ylabel(,labsize(large))
twoway (connected mw year, ms(O) lwidth(thick) yaxis(1 2) xlabel(,labsize(large))) (line fedmw year, lwidth(thick) yaxis(1 2) xlabel(,labsize(large))), xtitle("") ytitle(,axis(1) size(huge)) ytitle(,axis(2) size(huge)) ylabel(,axis(1) labsize(large) nogrid) ylabel(,axis(2) labsize(large) nogrid) ///
  legend(label(1 "Baseline") label(2 "Federal") region(style(none)) ring(0) position(12) size(*1.5)) graphregion(color(white))
graph export "../Figures/minwage.pdf", as(pdf) replace

********************
*Plotting residualized patterns (linear time trend)
********************

reg w t mw
predict w_res_s, residual
label variable w_res_s "Residualized college premium"
reg sPop t mw
predict s_res, residual
label variable s_res "Residualized relative supply of college workers"
twoway (connected w_res_s year, ms(O) lwidth(thick) yaxis(1) xlabel(,labsize(large))) (line s_res year, lwidth(thick) yaxis(2) xlabel(,labsize(large))), xtitle("") ytitle(,axis(1) size(large)) ytitle(,axis(2) size(large)) ylabel(,axis(1) labsize(large) nogrid) ylabel(,axis(2) labsize(large) nogrid) ///
  legend(label(1 "Residualized" "college premium") label(2 "Residualized" "relative supply") region(style(none)) ring(0) position(6) size(large)) graphregion(color(white))
graph export "../Figures/residual_supply_premium.pdf", as(pdf) replace

reg w t sPop
predict w_res_mw, residual
label variable w_res_mw "Residualized college premium"
reg mw t sPop
predict mw_res, residual
label variable mw_res "Residualized minimum wage"
twoway (connected w_res_mw year, ms(O) lwidth(thick) yaxis(1) xlabel(,labsize(large))) (line mw_res year, lwidth(thick) yaxis(2) xlabel(,labsize(large))), xtitle("") ytitle(,axis(1) size(large)) ytitle(,axis(2) size(large)) ylabel(,axis(1) labsize(large) nogrid) ylabel(,axis(2) labsize(large) nogrid) ///
  legend(label(1 "Residualized" "college premium") label(2 "Residualized" "minimum wage") region(style(none)) ring(0) position(6) size(large)) graphregion(color(white))
graph export "../Figures/residual_mw_premium.pdf", as(pdf) replace

drop w_res_s s_res w_res_mw mw_res

********************
* PLOTTING OUT-OF-SAMPLE FIT
********************

ivreg w (sHour = sPop) t if year<=1987
predict w_KM
ivreg w (sHour = sPop) t mw if year<=1987
predict w_KMmw
twoway (line w year, lwidth(thick) yaxis(1 2) xlabel(,labsize(large)) xline(1987 , lcolor(black))) (connected w_KM year, ms(O) lwidth(thick) yaxis(1 2) xlabel(,labsize(large))) (connected w_KMmw year, ms(T) lwidth(thick) yaxis(1 2) xlabel(,labsize(large))), ///
  xtitle("") ytitle(,axis(1) size(huge)) ytitle(,axis(2) size(huge)) ylabel(,axis(1) labsize(large) nogrid) ylabel(,axis(2) labsize(large) nogrid) ///
  legend(label(1 "College premium") label(2 "Predicted w/out mw") label(3 "Predicted with mw") ring(0) position(11) stack) graphregion(color(white))
graph export "../Figures/KMextend_hours_predict.pdf", as(pdf) replace

drop w_KM w_KMmw

********************
* PLOTTING DETRENDED PREMIA (using third degree polynomial): observed, predicted (separately) by mw + s, mw, s
********************

* first get sHour as predicted by sPop, mw, and t, t2, and t3
reg sHour sPop mw t t2 t3
predict sHour_FS  // this is the first stage prediction

* detrending all variables: w, sHour_FS, mw
reg w t t2 t3
predict res_w, residual
reg sHour_FS t t2 t3
predict res_s, residual
reg mw t t2 t3
predict res_mw, residual

* predicting res_w using res_s and res_mw (should replicate main result with same time cubic: and it does)
reg res_w res_s res_mw
predict res_w_All
gen bres_s=_b[res_s]
gen bres_mw=_b[res_mw]
gen res_c=_b[_cons]
assert abs(res_c)<0.000001  // constant should be zero, and it is

gen res_w_all = res_s*bres_s + res_mw*bres_mw
gen res_w_mw  =                res_mw*bres_mw
gen res_w_s   = res_s*bres_s

*checking that i did it right
assert abs(res_w_All-res_w_all)<0.00001
drop res_w_All

twoway (line res_w year, lwidth(thick) yaxis(1 2) xlabel(,labsize(large))) (line res_w_all year, lwidth(thick) lpattern(dash) yaxis(1 2) xlabel(,labsize(large))) (line res_w_mw year, lwidth(thick) lpattern(dot) yaxis(1 2) xlabel(,labsize(large))) (line res_w_s year, lwidth(thick) lpattern(shortdash dot) yaxis(1 2) xlabel(,labsize(large))), xtitle("") ytitle(,axis(1) size(huge)) ytitle(,axis(2) size(huge)) ylabel(,axis(1) labsize(large) nogrid) ylabel(,axis(2) labsize(large) nogrid) ///
  legend(label(1 "Observed (detrended)") label(2 "Both mw and supply") label(3 "Only mw") label(4 "Only supply") region(style(none)) ring(0) position(11)) graphregion(color(white))
  graph export "../Figures/predictedwage.pdf", as(pdf) replace

* variance decomposition of detrended series
reg res_w_mw res_w_all
reg res_w_s res_w_all

drop res* bres* sHour_FS


****************************************
****************************************
****************************************
* TABLES
****************************************
****************************************
****************************************

********************
* MAIN TABLE
********************

label variable t "Time"
label variable sHours "Relative supply of college workers"
label variable mw "Real minimum wage"
label variable fedmw "Real federal minimum wage"

clonevar t_keep = t
label variable t_keep "Time"

eststo clear

* Column a: Katz Murphy (KM) specification and years
eststo: ivreg w (sHours=sPop) t_keep if year<1988, robust first
estadd local tp 1

* Column b: KM specification full sample
eststo: ivreg w (sHours=sPop) t_keep, robust first
estadd local tp 1

* Column c: Katz Murphy (KM) years but including minimum wage
eststo: ivreg w (sHours=sPop) mw t_keep if year<1988, robust first
estadd local tp 1

* Column d: Full model with only linear trend
eststo: ivreg w (sHours=sPop) mw t_keep, robust first
estadd local tp 1

* Column e: Full model with t2
eststo: ivreg w (sHours=sPop) mw t t2, robust first
estadd local tp 2

* Column f: Full model with t3
eststo: ivreg w (sHours=sPop) mw t t2 t3, robust first
estadd local tp 3

* Column g: Baseline but using federal minimum wage
eststo: ivreg w (sHours=sPop) fedmw t_keep, robust first
estadd local tp 1

esttab * using "../Figures/table_national.tex", b(3) replace se label ///
	mgroups("1963-1987" "1963-2016" "1963-1987"  "1963-2016", pattern(1 1 1 1 0 0 0) ///
prefix(\multicolumn{@span}{c}{) suffix(})   ///
span erepeat(\cmidrule(lr){@span})) mtitles("(1)" "(2)" "(3)" "(4)" "(5)" "(6)" "(7)") ///
stats(tp N r2, fmt(%9.0fc %9.0fc %9.3fc) labels("Time Polynomial" "Observations" "\textit{R}-squared")) keep(sHours mw fedmw t_keep _cons) ///
booktabs nonum nonote order(sHours mw fedmw t_keep _cons) nostar

drop t_keep

********************
* Higher-dimensional polynomials of time in baseline
********************

clonevar t_keep = t
label variable t_keep "Time"
label variable sHours "Relative supply of college workers"
  
eststo clear

eststo: ivreg w (sHour=sPop) mw t_keep, robust
estadd local tp 1

eststo: ivreg w (sHour=sPop) mw t t2, robust
estadd local tp 2

eststo: ivreg w (sHour=sPop) mw t t2 t3, robust
estadd local tp 3

eststo: ivreg w (sHour=sPop) mw t t2 t3 t4, robust
estadd local tp 4

eststo: ivreg w (sHour=sPop) mw t t2 t3 t4 t5, robust
estadd local tp 5

esttab * using "../Figures/table3_sHour.tex", b(3) se(3) replace se label ///
  mgroups(, pattern(1 0 0 0 0) ///
  prefix(\multicolumn{@span}{c}{) suffix(})   ///
  span erepeat(\cmidrule(lr){@span})) mtitles("(1)" "(2)" "(3)" "(4)" "(5)") ///
  stats(tp, fmt(%9.0fc) labels("Time Polynomial")) keep(sHours mw t_keep) ///
  booktabs nonum nonote order(sHours mw t_keep) nostar

drop t_keep

********************
* Showing main table using two alternative measures of relative supply
********************

****** Reduced-form version using population-based measure

label variable t "Time"
label variable sPop "Relative supply of college workers"
label variable mw "Real minimum wage"
label variable fedmw "Real federal minimum wage"

clonevar t_keep = t
label variable t_keep "Time"

eststo clear

* Column a: Katz Murphy (KM) specification and years
eststo: reg w sPop t_keep if year<1988, robust
estadd local tp 1

* Column b: KM specification full sample
eststo: reg w sPop t_keep, robust
estadd local tp 1

* Column c: Katz Murphy (KM) years but including minimum wage
eststo: reg w sPop mw t_keep if year<1988, robust
estadd local tp 1

* Column d: Full model with only linear trend
eststo: reg w sPop mw t_keep, robust
estadd local tp 1

* Column e: Full model with t2
eststo: reg w sPop mw t t2, robust
estadd local tp 2

* Column f: Full model with t3
eststo: reg w sPop mw t t2 t3, robust
estadd local tp 3

esttab * using "../Figures/table_national_RF.tex", b(3) replace se label ///
	mgroups("1963-1987" "1963-2016" "1963-1987"  "1963-2016", pattern(1 1 1 1 0 0) ///
prefix(\multicolumn{@span}{c}{) suffix(})   ///
span erepeat(\cmidrule(lr){@span})) mtitles("(1)" "(2)" "(3)" "(4)" "(5)" "(6)") ///
stats(tp N r2, fmt(%9.0fc %9.0fc %9.3fc) labels("Time Polynomial" "Observations" "\textit{R}-squared")) keep(sPop mw t_keep _cons) ///
booktabs nonum nonote order(sPop mw fedmw t_keep _cons) nostar

drop t_keep

****** 2SLS version replacing hours-worked with dual-of-wage measure

label variable sDual "Relative supply of college workers"

clonevar t_keep = t
label variable t_keep "Time"

eststo clear

* Column a: Katz Murphy (KM) specification and years
eststo: ivreg w (sDual=sPop) t_keep if year<1988, robust
estadd local tp 1

* Column b: KM specification full sample
eststo: ivreg w (sDual=sPop) t_keep, robust
estadd local tp 1

* Column c: Katz Murphy (KM) years but including minimum wage
eststo: ivreg w (sDual=sPop) mw t_keep if year<1988, robust
estadd local tp 1

* Column d: Full model with only linear trend
eststo: ivreg w (sDual=sPop) mw t_keep, robust
estadd local tp 1

* Column e: Full model with t2
eststo: ivreg w (sDual=sPop) mw t t2, robust
estadd local tp 2

* Column f: Full model with t3
eststo: ivreg w (sDual=sPop) mw t t2 t3, robust
estadd local tp 3


esttab * using "../Figures/table_national_dual.tex", b(3) replace se label ///
	mgroups("1963-1987" "1963-2016" "1963-1987"  "1963-2016", pattern(1 1 1 1 0 0) ///
prefix(\multicolumn{@span}{c}{) suffix(})   ///
span erepeat(\cmidrule(lr){@span})) mtitles("(1)" "(2)" "(3)" "(4)" "(5)" "(6)") ///
stats(tp N r2, fmt(%9.0fc %9.0fc %9.3fc) labels("Time Polynomial" "Observations" "\textit{R}-squared")) keep(sDual mw t_keep _cons) ///
booktabs nonum nonote order(sDual mw fedmw t_keep _cons) nostar

drop t_keep


****
* Effect of predicted supply 1979-89
****

*I do this to get the # for the draft, where I need predicted change in supply

reg sHours sPop mw t
predict sH_p1
/*
* test it:
reg w sH_p1 mw t
ivreg w (sHours=sPop) mw t
*/
reg sHours sPop mw t t2
predict sH_p2
reg sHours sPop mw t t2 t3
predict sH_p3
reg sHours sPop mw t t2 t3 t4
predict sH_p4
reg sHours sPop mw t t2 t3 t4 t5
predict sH_p5

********************
* Separate college and non-college regressions
********************

clonevar t_keep = t
label variable t_keep "Time"

label variable sHours "Relative supply"
label variable mw "Real minimum wage"
eststo clear

eststo: ivreg w (sHours=sPop) mw t_keep, robust
eststo: ivreg w1 (sHours=sPop) mw t_keep, robust
eststo: ivreg w0 (sHours=sPop) mw t_keep, robust

eststo: ivreg w (sHours=sPop) mw t t2, robust
eststo: ivreg w1 (sHours=sPop) mw t t2, robust
eststo: ivreg w0 (sHours=sPop) mw t t2, robust

eststo: ivreg w (sHours=sPop) mw t t2 t3, robust
eststo: ivreg w1 (sHours=sPop) mw t t2 t3, robust
eststo: ivreg w0 (sHours=sPop) mw t t2 t3, robust

esttab using "../Figures/table_national_level.tex", b(3) replace se label ///
	mgroups("Linear" "Quadratic" "Cubic", pattern(1 0 0 1 0 0 1 0 0) ///
prefix(\multicolumn{@span}{c}{) suffix(})   ///
span erepeat(\cmidrule(lr){@span})) mtitles("(Premium)" "(High)" "(Low)" "(Premium)" "(High)" "(Low)" "(Premium)" "(High)" "(Low)") ///
stats(, ) keep(sHours mw t_keep) ///
booktabs nonum nonote order(sHours mw t_keep) nostar

drop t_keep

********************
* Use AKK's data
********************

* Part I of the figure

use "Input/march-price-quantity-exp-all.dta", clear // From AKK replication package
merge 1:1 year using "Input/min-unemp-6305.dta"  // From AKK replication package
drop _merge
merge 1:1 year using Output/gdpDeflatorFRED_year.dta // Constructed above in this do file
keep if _merge==3
drop _merge
gen mw=ln(100*minimum/gdp)
label variable mw "Minimum wage"
rename clphsg_all w
rename eu_lnclg s
label variable s "Relative supply of college workers"
label variable mw "Real minimum wage"
gen t=year-1962
forval y=2(1)5 {
  gen t`y'=t^`y'
}

eststo clear

eststo: reg w s mw t, robust
estadd local tp 1

eststo: reg w s mw t t2, robust
estadd local tp 2

eststo: reg w s mw t t2 t3, robust
estadd local tp 3

eststo: reg w s mw t t2 t3 t4, robust
estadd local tp 4

eststo: reg w s mw t t2 t3 t4 t5, robust
estadd local tp 5

esttab * using "../Figures/AKK.tex", b(3) se(3) replace se label ///
	mgroups("Using AKK data", pattern(1 0 0 0 0) ///
prefix(\multicolumn{@span}{c}{) suffix(})   ///
span erepeat(\cmidrule(lr){@span})) mtitles("(1)" "(2)" "(3)" "(4)" "(5)") ///
stats(, ) keep(s mw) ///
booktabs nonum nolines nonote order(s mw) nostar

* Part II of the figure, which appends in

use "Input/march-price-quantity-exp-all.dta", clear // From AKK replication package

rename clphsg_all w
rename eu_lnclg s
label variable s "Relative supply of college workers"
gen t=year-1962
forval y=2(1)7 {
  gen t`y'=t^`y'
}

merge 1:1 year using Output/mw_series.dta  // Constructed above in this do file
assert _merge~=1
drop _merge
replace mw=ln(mw)
label variable mw "Real minimum wage"

eststo clear

eststo: reg w s mw t, robust
estadd local tp 1

eststo: reg w s mw t t2, robust
estadd local tp 2

eststo: reg w s mw t t2 t3, robust
estadd local tp 3

eststo: reg w s mw t t2 t3 t4, robust
estadd local tp 4

eststo: reg w s mw t t2 t3 t4 t5, robust
estadd local tp 5

esttab * using "../Figures/AKK.tex", b(3) se(3) append se label ///
	mgroups("Using my baseline real minimum wage", pattern(1 0 0 0 0 ) ///
prefix(\multicolumn{@span}{c}{) suffix(})   ///
span erepeat(\cmidrule(lr){@span})) mtitles("(1)" "(2)" "(3)" "(4)" "(5)") ///
stats(tp N, fmt(%9.0fc %9.0fc %9.3fc) labels("Time Polynomial" "Observations")) keep(s mw) ///
booktabs nonum nonote order(s mw) nostar


/*
** After running this code, must go into AKK.tex and:
1) move toprule from middle (line 13) to just after first line (a new line 2)
2) replace \midrule on line 16 with \addlinespace (leave \midrule on line 22 alone)
*/
